import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd   
import pickle
from decision_company import read_csv_file, fetch_column, calculate_quantile
  
# Load the dataset using the custom function  
credit_customers = read_csv_file(os.path.join(sys.argv[1], 'credit_customers.csv')) 
  
# Calculate the 75th percentile for credit_amount and duration  
credit_amount_column = fetch_column(credit_customers, 'credit_amount')  
duration_column = fetch_column(credit_customers, 'duration')  
  
credit_amount_75th = calculate_quantile(credit_amount_column, 0.75)  
duration_75th = calculate_quantile(duration_column, 0.75)  
  
print(f"credit_amount_75th: {credit_amount_75th}, duration_75th: {duration_75th}")  
# pickle.dump(credit_amount_75th, open("./ref_result/credit_amount_75th.pkl","wb"))
# pickle.dump(duration_75th, open("./ref_result/duration_75th.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, create_condition, logical_and, filter_by_condition
  
# Load the dataset using the custom function  
  
# Create conditions for filtering the dataset  
credit_amount_condition = create_condition(credit_amount_column, credit_amount_75th)  
duration_condition = create_condition(duration_column, duration_75th)  
  
# Combine conditions  
combined_condition = logical_and(credit_amount_condition, duration_condition)  
  
# Filter the dataset based on the combined condition  
high_credit_long_duration = filter_by_condition(credit_customers, combined_condition)  
  
print(f"high_credit_long_duration: {high_credit_long_duration}")  
# pickle.dump(high_credit_long_duration, open("./ref_result/high_credit_long_duration.pkl","wb"))

import pandas as pd   
import pickle
from decision_company import read_csv_file
  
# Load the dataset using the custom function  
  
# Extract the required information (client IDs, credit amounts, and loan durations)  
high_credit_long_duration_filtered = high_credit_long_duration[['credit_amount', 'duration']] 

print(f"high_credit_long_duration: {high_credit_long_duration_filtered}")  
# pickle.dump(high_credit_long_duration_filtered, open("./ref_result/high_credit_long_duration_filtered.pkl","wb"))

import pandas as pd   
import pickle
from decision_company import read_csv_file, convert_to_tuples
  
# Load the dataset using the custom function  
  
# Convert the result to a list of tuples  
result_list = list(convert_to_tuples(high_credit_long_duration_filtered))

print(f"high_credit_long_duration: {result_list}")  
# pickle.dump(result_list, open("./ref_result/result_list.pkl","wb"))

import pandas as pd   
import pickle
from decision_company import read_csv_file, filter_by_value

# Load the dataset  
  
late_payments = filter_by_value(high_credit_long_duration, 'credit_history', 'delayed previously')  
  
print(f"late_payments: {late_payments}")  
# pickle.dump(late_payments, open("./ref_result/late_payments.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, fetch_index

# Load the dataset  
  
# Extract the client IDs  
result = fetch_index(late_payments)  

print(f"result_IDs: {result}")  
# pickle.dump(result, open("./ref_result/result_IDs.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, convert_to_list

# Load the dataset  
  
# Convert the result to a list  
result_list = convert_to_list(result)  

print(f"result_list: {result_list}")  
# pickle.dump(result_list, open("./ref_result/result_list.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, calculate_median, filter_by_condition, create_condition, logical_and

# Load the dataset  
  
# Filter the clients with multiple existing credits and high installment commitments  
installment_commitment_median = calculate_median(credit_customers, 'installment_commitment')  
multiple_credits_high_commitment = filter_by_condition(high_credit_long_duration, logical_and(create_condition(high_credit_long_duration['existing_credits'], 1), create_condition(credit_customers['installment_commitment'], installment_commitment_median)))  
  
print(f"multiple_credits_high_commitment: {multiple_credits_high_commitment}")  
# pickle.dump(multiple_credits_high_commitment, open("./ref_result/multiple_credits_high_commitment.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, fetch_index

# Load the dataset  
  
# Extract the client IDs  
result = fetch_index(multiple_credits_high_commitment) 

print(f"multiple_credits_high_commitment_idx: {result}")  
# pickle.dump(result, open("./ref_result/multiple_credits_high_commitment_idx.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, convert_to_list

# Load the dataset  
  
# Convert the result to a list  
result_list = convert_to_list(result)  

print(f"result_list: {result_list}")  
# pickle.dump(result_list, open("./ref_result/result_list_3.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, filter_by_condition

# Load the dataset  
  
# Create conditions  
condition1 = high_credit_long_duration['age'] >= 25 
# Filter the clients aged between 25 and 55  
clients_min_age = filter_by_condition(high_credit_long_duration, condition1) 
  
print(f"clients_min_age: {clients_min_age}")  
# pickle.dump(clients_min_age, open("./ref_result/clients_min_age.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, logical_and, filter_by_condition

# Load the dataset  
  
# Create conditions  
condition1 = high_credit_long_duration['age'] >= 25  
condition2 = high_credit_long_duration['age'] <= 55  
  
# Perform logical AND operation  
final_condition = logical_and(condition1, condition2)  
  
# Filter the clients aged between 25 and 55  
clients_25_to_55 = filter_by_condition(high_credit_long_duration, final_condition)  

print(f"clients_25_to_55: {clients_25_to_55}")  
# pickle.dump(clients_25_to_55, open("./ref_result/clients_25_to_55.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, count_rows

# Load the dataset  
  
# Calculate the count of clients aged between 25 and 55  
result_count = count_rows(clients_25_to_55)  

print(f"result_count: {result_count}")  
# pickle.dump(result_count, open("./ref_result/result_count.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, logical_or, filter_by_condition

# Load the dataset  
  
# Create conditions  
condition1 = high_credit_long_duration['employment'] == '4<=X<7'  
condition2 = high_credit_long_duration['employment'] == '>=7'  
  
# Perform logical OR operation  
final_condition = logical_or(condition1, condition2)  
  
# Filter the clients with stable employment (4 years or more)  
stable_employment = filter_by_condition(high_credit_long_duration, final_condition)  
  
print(f"stable_employment: {stable_employment}")  
# pickle.dump(stable_employment, open("./ref_result/stable_employment.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, fetch_index

# Load the dataset  
  
# Extract the client IDs  
stable_employment_idx = fetch_index(stable_employment)  

print(f"stable_employment_idx: {stable_employment_idx}")  
# pickle.dump(stable_employment_idx, open("./ref_result/stable_employment_idx.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, convert_to_list

# Load the dataset  
  
# Convert the result to a list  
result_list = convert_to_list(stable_employment_idx)  

print(f"result_list: {result_list}")  
# pickle.dump(result_list, open("./ref_result/result_list_4.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, filter_by_condition

# Load the dataset  
  
# Create conditions  
condition1 = stable_employment['housing'] == 'rent'  
  
# Filter the clients residing in rented or owned housing  
rented_housing = filter_by_condition(stable_employment, condition1)  
  
print(f"rented_housing: {rented_housing}")  
# pickle.dump(rented_housing, open("./ref_result/rented_housing.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, filter_by_condition

# Load the dataset  

# Load the dataset  
  
# Create conditions  
condition2 = stable_employment['housing'] == 'own'   
  
# Filter the clients residing in rented or owned housing  
owned_housing = filter_by_condition(stable_employment, condition2)  

print(f"owned_housing: {owned_housing}")  
# pickle.dump(owned_housing, open("./ref_result/owned_housing.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, concatenate_objects

# Load the dataset  
  
rented_owned_housing = concatenate_objects(rented_housing, owned_housing, reset_index_flag=False) 

print(f"rented_owned_housing: {rented_owned_housing}")  
# pickle.dump(rented_owned_housing, open("./ref_result/rented_owned_housing.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, count_rows

# Load the dataset  
  
# Calculate the count of clients residing in rented or owned housing  
result_count = count_rows(rented_owned_housing)  

print(f"result_count: {result_count}")  
# pickle.dump(result_count, open("./ref_result/result_count_2.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, avg

# Load the dataset  
  
# Fetch the 'credit_amount', 'duration', and 'employment' columns  
credit_amount_column = stable_employment['credit_amount']   
  
# Calculate the average credit amount and average loan duration  
average_credit_amount = avg(credit_amount_column)  

print(f"average_credit_amount: {average_credit_amount}")  
# pickle.dump(average_credit_amount, open("./ref_result/average_credit_amount.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, avg

# Load the dataset  
  
# Fetch the 'credit_amount', 'duration', and 'employment' columns  
duration_column = stable_employment['duration']   
  
# Calculate the average credit amount and average loan duration  
average_loan_duration = avg(duration_column)  

print(f"average_loan_duration: {average_loan_duration}")  
# pickle.dump(average_loan_duration, open("./ref_result/average_loan_duration.pkl","wb"))

  

import pandas as pd   
import pickle
from decision_company import read_csv_file, locate_mode, visit_by_index

# Load the dataset  

# Fetch the 'credit_amount', 'duration', and 'employment' columns  
employment_column = stable_employment['employment']  

# Find the most common employment status  
most_common_employment = visit_by_index(locate_mode(employment_column),0)  

print(f"most_common_employment: {most_common_employment}")  
# pickle.dump(most_common_employment, open("./ref_result/most_common_employment.pkl","wb"))

  

import pandas as pd
from sklearn.preprocessing import LabelEncoder    
import pickle
from decision_company import read_csv_file, df_copy, encoder_instance, encode_column

# Load the dataset  
  
# Create a copy of the stable_employment DataFrame  
stable_employment_encoded = df_copy(stable_employment)  
  
# Initialize the label encoder  
encoder = encoder_instance()  
  
# Convert non-numeric columns to numeric values using label encoding 
for column in stable_employment_encoded.columns:    
    if stable_employment_encoded[column].dtype == 'object':    
        stable_employment_encoded[column] = encode_column(encoder, stable_employment_encoded[column])   
 
print(f"stable_employment_encoded: {stable_employment_encoded}")  
# pickle.dump(stable_employment_encoded, open("./ref_result/stable_employment_encoded.pkl","wb"))

  

